# -*- coding: utf-8 -*-
import scrapy,json
from TaoBao.items import TaoBaoItem
class TaobaoSpider(scrapy.Spider):
    name = 'taobao'
    allowed_domains = ['taobao.com']
    start_url = 'https://s.m.taobao.com/h5'


    def start_requests(self):

        search = ['aape','ins 卫衣','日系风衣','港系风衣男','沙发垫','库里','杜兰特','詹姆斯','小米手机',
                  '华为手机','摄像机','投影仪','死飞','游戏','DIY','手表','眼镜','乐器','家电','箱包',
                  '高智商玩具','宠物','足球鞋', '安德玛库里4', 'AJ', 'Python书籍', 'Linux书籍', '李宁重燃'
                  ]
        for i in search:
            base_url = 'https://s.m.taobao.com/search?q='+i+'&sst=1&n=20&buying=buyitnow&m=api4h5&abtest=15&wlsort=15&page={}'
            for i in range(100):
                url = base_url.format(i+1)
                req = scrapy.Request(url=url)
                yield req

    def parse(self, response):
        try:
            res_dict = json.loads(response.body.decode('utf-8'))
            # print(res_dict)
            if 'listItem' not in res_dict:
                return
            lists = res_dict['listItem']
            for i in lists:
                title = i['title']          #标题
                addr = i['location']        #地点
                oprice = i['originalPrice'] #原价
                pprice = i['price']         #现价
                img_url = i['pic_path']     #图片（url）
                company = i['nick']         #店铺
                paynum = i['sold']          #付款人数

                item = TaoBaoItem()

                item['title'] = title
                item['addr'] = addr
                item['oprice'] = float(oprice)
                item['pprice'] = float(pprice)
                item['img_url'] = img_url
                item['company'] = company
                item['paynum'] = int(paynum)
                yield item
        except:
            pass






